These data frames have been previously prepared, based on the data from 50 influential people in Seattle tech to follow on Twitter. The file can be seen here, and includes 3 sheets:
rm(list = ls())
##
fileLink='https://github.com/eScience-UW/gentleIntro_networks/raw/main/data/seattleTop.xlsx'
edges=rio::import(fileLink, which = 'edges')
adjacency=rio::import(fileLink, which = 'adjacency')
attributes=rio::import(fileLink,which ="attributes")
Use the previous dataframes to create the network:
library(igraph) # package needed
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
EliteNet=graph_from_data_frame(edges, directed = F)
summary(EliteNet)
## IGRAPH 4205504 UN-- 46 393 --
## + attr: name (v/c)
#prepare matrix
row.names(adjacency)=adjacency[,1] #first column for index
adjacency=adjacency[,-1] # deleting the first column
adjacencyMatrix=as.matrix(adjacency)
# the making...
EliteNet=graph_from_adjacency_matrix(adjacencyMatrix,mode="undirected")
summary(EliteNet)
## IGRAPH f973939 UN-- 46 393 --
## + attr: name (v/c)
See inside:
V(EliteNet)
## + 46/46 vertices, named, from f973939:
## [1] rachelerman mattmcilwain DaveParkerSEA toddbishop
## [5] ashannstew LeslieFeinzaig akipman matt_oppy
## [9] gilbert juliesandler BradSmi crashdev
## [13] ShaunaCausey john_gabbert moniguzman mattmday
## [17] Rich_Barton daryn lovelletters etzioni
## [21] MissDestructo heatherredman danshapiro medinism
## [25] KieranSnyder hadip RajSinghSeattle funcOfJoe
## [29] kirbywinfield stevesi Ryanintheus sonalpmane
## [33] SoGulley 2morrowknight jinman tarah
## [37] Jenerationy lanctot Kristen_Hammy nhuntwalker
## + ... omitted several vertices
E(EliteNet)
## + 393/393 edges from f973939 (vertex names):
## [1] rachelerman--mattmcilwain rachelerman--DaveParkerSEA
## [3] rachelerman--toddbishop rachelerman--ashannstew
## [5] rachelerman--LeslieFeinzaig rachelerman--akipman
## [7] rachelerman--matt_oppy rachelerman--gilbert
## [9] rachelerman--juliesandler rachelerman--BradSmi
## [11] rachelerman--crashdev rachelerman--ShaunaCausey
## [13] rachelerman--john_gabbert rachelerman--moniguzman
## [15] rachelerman--mattmday rachelerman--Rich_Barton
## [17] rachelerman--daryn rachelerman--lovelletters
## [19] rachelerman--etzioni rachelerman--MissDestructo
## + ... omitted several edges
Attributes are added using vectors:
# edges
E(EliteNet)$weight=1
# EliteNet=set_edge_attr(EliteNet,"weight", value = 1)
#nodes
V(EliteNet)$male=attributes$male
V(EliteNet)$followers=attributes$followers
# EliteNet=set_vertex_attr(EliteNet, "followers", value=attributes$followers)
##
# E(EliteNet)$weight
# edge_attr_names(EliteNet)
# V(EliteNet)$followers
# vertex_attr_names(EliteNet)
summary(EliteNet)
## IGRAPH f973939 UNW- 46 393 --
## + attr: name (v/c), male (v/n), followers (v/n), weight (e/n)
You can use attributes to color nodes:
library(ggraph)
## Loading required package: ggplot2
base=ggraph(graph = EliteNet)
## Using "stress" as default layout
base + geom_node_label(aes(label = name,
color=as.factor(male)),
repel = TRUE,show.legend = F) +
geom_edge_link(alpha=0.1) +
scale_color_manual(values = c('red','blue'))
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
is_connected(EliteNet)
## [1] TRUE
graph.density(EliteNet)
## [1] 0.3797101
diameter(EliteNet)
## [1] 3
Assortativity: it is a measure to see if nodes are connecting to other nodes similar to themselves. Closer to 1 means higher assortativity, closer to -1 diassortativity; while 0 is no assortativity.
assortativity_degree(EliteNet,directed=F)
## [1] -0.2541114
- Categorical assortativity: tendency for nodes with other nodes sharing the same category.
assortativity_nominal(EliteNet,as.factor(V(EliteNet)$male),directed = F)
## [1] 0.04310211
- Assortativity (numerical):
assortativity(EliteNet,V(EliteNet)$followers,directed = T)
## [1] -0.04515662
The eigenvector of a vertex will tell you how well connected is a vertex; that is, vertices with the highest values are considered the most influential as they are connected to vertices that are also well connected.
The closeness of a vertex will tell you how close is a vertex to every other vertex. A vertex with high closeness can share information faster than the rest.
The betweeness of a vertex will tell you how critical is a vertex to connect vertex that are not connected directly.
eigen=eigen_centrality (EliteNet,scale=F)$vector
close=closeness(EliteNet,normalized=T)
betw=betweenness(EliteNet,normalized=T)
DFCentrality=as.data.frame(cbind(eigen,close,betw),stringsAsFactors = F)
names(DFCentrality)=c('Eigenvector','Closeness','Betweenness')
DFCentrality$person=row.names(DFCentrality)
row.names(DFCentrality)=NULL
library(ggplot2)
ggplot(DFCentrality, aes(x=Betweenness, y=Closeness)) +
theme_classic() +
geom_text(aes(label=person,size=Eigenvector),show.legend = F,alpha=0.5)
The previous results tells us that two people are salient:
HubNodes=dplyr::top_n(DFCentrality,2,Eigenvector)$person
HubNodes
## [1] "DaveParkerSEA" "toddbishop"
Let’s highlight those guys:
NodeCount=length(V(EliteNet))
V(EliteNet)$label=''
for (index in seq(1:NodeCount)){
currentName=V(EliteNet)$name[index]
if (currentName%in%HubNodes){
V(EliteNet)$label[index]=currentName
}
}
library(ggraph)
base=ggraph(graph = EliteNet)
## Using "stress" as default layout
base + geom_node_label(aes(label = label),
repel = TRUE,
show.legend = F,
color='red') +
geom_edge_link(alpha=0.1)
RandomNet <- erdos.renyi.game(length(V(EliteNet)), 0.5)
transitivity(RandomNet)
## [1] 0.5250664
transitivity(EliteNet)
## [1] 0.5504253
Set of nodes that belong to a group. The hard problems is to decide what node goes where:
# edges that will partition the net
mincut=igraph::min_cut(EliteNet, value.only = FALSE)$cut
mincut
## + 2/393 edges from f973939 (vertex names):
## [1] DaveParkerSEA--2morrowknight MissDestructo--2morrowknight
# ends() gives you a matrix, unique() keeps non duplicates
labelsEdgesBye=unique(as.vector(ends(EliteNet,mincut))) # flattening
labelsEdgesBye
## [1] "DaveParkerSEA" "MissDestructo" "2morrowknight"
EliteNet_cut=delete_edges(EliteNet,mincut)
SubSetLabels=ifelse(V(EliteNet)$name%in%labelsEdgesBye,V(EliteNet)$name,"")
base=ggraph(graph =EliteNet_cut ) + geom_edge_link(alpha=0.2)
## Using "stress" as default layout
base + geom_node_label(aes(label=SubSetLabels),repel = T)
The girvan_newman algorithm is based on creating partitions using the centrality of nodes to create partition too. We can create something similar to the last result like this:
# creating all partitions
partition_girvanNewman_all <- edge.betweenness.community(EliteNet,merges = T)
## Warning in edge.betweenness.community(EliteNet, merges = T): At
## core/community/edge_betweenness.c:493 : Membership vector will be selected
## based on the highest modularity score.
# the first partition
partition_girvanNewman_first= cut_at(partition_girvanNewman_all,2)
ColorIndex=partition_girvanNewman_first
base=ggraph(graph =EliteNet) + geom_edge_link(alpha=0.2)
## Using "stress" as default layout
base + geom_node_point(aes(color=ColorIndex), show.legend = F, size=4) +
geom_node_text(aes(label=label,color=ColorIndex),repel = T,show.legend = F)
A more complex algorithm is the Louvain:
partition_louvain=cluster_louvain(EliteNet)
V(EliteNet)$louvain=membership(partition_louvain)
base=ggraph(graph =EliteNet) + geom_edge_link(alpha=0.2)
## Using "stress" as default layout
base + geom_node_point(aes(color=as.factor(louvain)), show.legend = F, size=4)
a_partition_girvanNewman=cut_at(partition_girvanNewman_all,max(membership(partition_louvain)))
If we get positive values (being 1 the top value), we could consider there are good community structures (wiki). The higher modularity the connections between the nodes are dense within a partition but sparse between nodes in different partitions.
modularity(EliteNet, a_partition_girvanNewman)
## [1] -0.0002751717
modularity(EliteNet,membership(partition_louvain))
## [1] 0.1106546
V(EliteNet)$louvain=membership(partition_louvain)
V(EliteNet)$girvanNewman=membership(partition_louvain)
write_graph(EliteNet, "EliteNet_R.graphml", "graphml")